#!/usr/bin/env python
# encoding: utf-8
# @summary: 获取时间，ip，地域，suv，domain，url数据
# @attention: todo
# @author: hongxingfan
# @since: 2014年10月21日    下午3:43:26

"""
 hadoop jar /opt/sohuhadoop/hadoop/contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar  \
 -input /user/autolog/pvlog/20141020/   \
 -output /user/autolog/tmp/fan/20  \
 -mapper "python Demo1Mapper.py"  \
 -reducer "python Demo1Reducer.py"  
 -file ./Demo1*
 -D mapred.reducer.tasks=5
"""

import sys

set = set()

if __name__ == "__main__":
    for line in sys.stdin:
        line = line.strip()
        cols = line.split("\t");
        try:
            if "wmh-auto-tab" in cols[5]:
                continue;
        except IndexError, e:
            print(cols)
        line = "\t".join(cols[0:6])
        set.add(line);
        
    for s in set:
        print("%s" % (s))
